# SOME DESCRIPTIVE TITLE.
# Copyright (C) 2021, PaddleNLP
# This file is distributed under the same license as the PaddleNLP package.
# FIRST AUTHOR <EMAIL@ADDRESS>, 2022.
#
#, fuzzy
msgid ""
msgstr ""
"Project-Id-Version: PaddleNLP \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-05-19 14:17+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: Babel 2.10.1\n"

#: ../source/paddlenlp.transformers.tokenizer_utils.rst:2
msgid "tokenizer\\_utils"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:1
msgid "基类：:class:`object`"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:1
msgid ""
"The base class for all pretrained tokenizers. It mainly provides common "
"methods for loading (construction and loading) and saving pretrained "
"tokenizers. Loading and saving also rely on the following class "
"attributes which should be overridden by derived classes accordingly:"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:6
msgid ""
"**tokenizer_config_file** (str): Represents the file name of tokenizer "
"configuration for configuration saving and loading in local file system. "
"The value is `tokenizer_config.json`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:9
msgid ""
"**resource_files_names** (dict): Represents resources to specific file "
"names mapping for resource saving and loading in local file system. The "
"keys of dict representing resource items should be argument names in "
"tokenizer's `__init__` method, and the values are file names for saving "
"and loading corresponding resources. The mostly used resources here are "
"vocabulary file and sentence-piece model file."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:15
msgid ""
"**pretrained_init_configuration** (dict): Provides the tokenizer "
"configurations of built-in pretrained tokenizers (contrasts to tokenizers"
" in local file system). It has pretrained tokenizer names as keys (the "
"same as pretrained model names, such as `bert-base-uncased`), and the "
"values are dict preserving corresponding configuration for tokenizer "
"initialization."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:20
msgid ""
"**pretrained_resource_files_map** (dict): Provides resource URLs of "
"built-in pretrained tokenizers (contrasts to tokenizers in local file "
"system). It has the same keys as `resource_files_names`, and the values "
"are also `dict` mapping specific pretrained tokenizer names (such as "
"`bert-base-uncased`) to corresponding resource URLs."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:26
msgid ""
"Moreover, methods common to tokenizers for tokenization, token/id "
"conversion and encoding as model inputs are also provided here."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer:29
msgid ""
"Besides, metaclass `InitTrackerMeta` is used to create "
"`PretrainedTokenizer`, by which subclasses can track arguments for "
"initialization automatically and expose special tokens initialization "
"used as attributes."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:1
msgid ""
"Performs tokenization and uses the tokenized tokens to prepare model "
"inputs. It supports sequence or sequence pair as input, and batch input "
"is allowed. `self.encode()` or `self.batch_encode()` would be called "
"separately for single or batch input depending on input format and "
"`is_split_into_words` argument."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_pretrained
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_resources
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_vocabulary
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences
msgid "参数"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:7
msgid ""
"The sequence or batch of sequences to be processed. One sequence is a "
"string or a list of strings depending on whether it has been "
"pretokenized. If each sequence is provided as a list of strings "
"(pretokenized), you must set `is_split_into_words` as `True` to "
"disambiguate with a batch of sequences."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:13
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:9
msgid ""
"Same as `text` argument, while it represents for the latter sequence of "
"the sequence pair."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:16
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:10
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:12
msgid ""
"If set to a number, will limit the total sequence returned so that it has"
" a maximum length. If there are overflowing tokens, those overflowing "
"tokens will be added to the returned dictionary when "
"`return_overflowing_tokens` is `True`. Defaults to `None`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:21
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:15
msgid ""
"Only available for batch input of sequence pair and mainly for question "
"answering usage. When for QA, `text` represents questions and `text_pair`"
" represents contexts. If `stride` is set to a positive number, the "
"context will be split into multiple spans where `stride` defines the "
"number of (tokenized) tokens to skip from the start of one span to get "
"the next span, thus will produce a bigger batch than inputs to include "
"all spans. Moreover, 'overflow_to_sample' and 'offset_mapping' preserving"
" the original example and position information will be added to the "
"returned dictionary. Defaults to 0."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:31
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:25
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:17
msgid ""
"If set to `True`, the returned sequences would be padded up to "
"`max_seq_len` specified length according to padding side "
"(`self.padding_side`) and padding token id. Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:35
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:29
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:21
msgid ""
"String selected in the following options:  - 'longest_first' (default) "
"Iteratively reduce the inputs sequence until the input is under "
"`max_seq_len` starting from the longest one at each token (when there is "
"a pair of input sequences). - 'only_first': Only truncate the first "
"sequence. - 'only_second': Only truncate the second sequence. - "
"'do_not_truncate': Do not truncate (raise an error if the input sequence "
"is longer than `max_seq_len`).  Defaults to 'longest_first'."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:35
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:29
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:21
msgid "String selected in the following options:"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:37
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:31
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:23
msgid "'longest_first' (default) Iteratively reduce the inputs sequence"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:38
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:32
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:24
msgid ""
"until the input is under `max_seq_len` starting from the longest one at "
"each token (when there is a pair of input sequences). - 'only_first': "
"Only truncate the first sequence. - 'only_second': Only truncate the "
"second sequence. - 'do_not_truncate': Do not truncate (raise an error if "
"the input sequence is longer than `max_seq_len`)."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:45
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:39
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:31
msgid "Defaults to 'longest_first'."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:47
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:41
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:33
msgid ""
"Whether to include tokens position ids in the returned dictionary. "
"Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:50
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:44
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:36
msgid ""
"Whether to include token type ids in the returned dictionary. Defaults to"
" `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:53
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:47
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:39
msgid ""
"Whether to include the attention mask in the returned dictionary. "
"Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:56
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:50
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:42
msgid ""
"Whether to include the length of each encoded inputs in the returned "
"dictionary. Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:59
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:53
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:45
msgid ""
"Whether to include overflowing token information in the returned "
"dictionary. Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:62
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:56
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:48
msgid ""
"Whether to include special tokens mask information in the returned "
"dictionary. Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:65
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:59
msgid ""
"Decide the format for returned encoded batch inputs. Only works when "
"input is a batch of data. ::     - If True, encoded inputs would be a "
"dictionary like:         {'input_ids': [[1, 4444, 4385, 1545, 6712],[1, "
"4444, 4385]],         'token_type_ids': [[0, 0, 0, 0, 0], [0, 0, 0]]}"
"     - If False, encoded inputs would be a list like:         "
"[{'input_ids': [1, 4444, 4385, 1545, 6712],           'token_type_ids': "
"[0, 0, 0, 0, 0]},          {'input_ids': [1, 4444, 4385], "
"'token_type_ids': [0, 0, 0]}]  Defaults to `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:65
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:59
msgid ""
"Decide the format for returned encoded batch inputs. Only works when "
"input is a batch of data. ::"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:76
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:70
msgid "Defaults to `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:78
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:72
msgid ""
"Whether to include the list of pair preserving the index of start and end"
" char in original input for each token in the returned dictionary. Would "
"be automatically set to `True` when `stride` > 0. Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:83
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:77
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:55
msgid ""
"Whether to add the special tokens associated with the corresponding model"
" to the encoded inputs. Defaults to `True`"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add
#: paddlenlp.transformers.tokenizer_utils.convert_to_unicode
msgid "返回"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:87
msgid ""
"The dict has the following optional items:  - **input_ids** (list[int] or"
" list[list[int]]): List of token ids to be fed to a model. - "
"**position_ids** (list[int] or list[list[int]], optional): List of token "
"position ids to be   fed to a model. Included when `return_position_ids` "
"is `True` - **token_type_ids** (list[int] or list[list[int]], optional): "
"List of token type ids to be   fed to a model. Included when "
"`return_token_type_ids` is `True`. - **attention_mask** (list[int] or "
"list[list[int]], optional): List of integers valued 0 or 1,   where 0 "
"specifies paddings and should not be attended to by the   model. Included"
" when `return_attention_mask` is `True`. - **seq_len** (int or list[int],"
" optional): The input_ids length. Included when `return_length`   is "
"`True`. - **overflowing_tokens** (list[int] or list[list[int]], "
"optional): List of overflowing tokens.   Included when if `max_seq_len` "
"is specified and `return_overflowing_tokens`   is True. - "
"**num_truncated_tokens** (int or list[int], optional): The number of "
"overflowing tokens.   Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens`   is True. - **special_tokens_mask** "
"(list[int] or list[list[int]], optional): List of integers valued 0 or 1,"
"   with 0 specifying special added tokens and 1 specifying sequence "
"tokens.   Included when `return_special_tokens_mask` is `True`. - "
"**offset_mapping** (list[int], optional): list of pair preserving the   "
"index of start and end char in original input for each token.   For a "
"sqecial token, the index pair is `(0, 0)`. Included when   "
"`return_overflowing_tokens` is True or `stride` > 0. - "
"**overflow_to_sample** (int or list[int], optional): Index of example "
"from which this   feature is generated. Included when `stride` works."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:87
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:81
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:59
msgid "The dict has the following optional items:"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:89
msgid ""
"**input_ids** (list[int] or list[list[int]]): List of token ids to be fed"
" to a model."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:90
msgid ""
"**position_ids** (list[int] or list[list[int]], optional): List of token "
"position ids to be fed to a model. Included when `return_position_ids` is"
" `True`"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:92
msgid ""
"**token_type_ids** (list[int] or list[list[int]], optional): List of "
"token type ids to be fed to a model. Included when "
"`return_token_type_ids` is `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:94
msgid ""
"**attention_mask** (list[int] or list[list[int]], optional): List of "
"integers valued 0 or 1, where 0 specifies paddings and should not be "
"attended to by the model. Included when `return_attention_mask` is "
"`True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:97
msgid ""
"**seq_len** (int or list[int], optional): The input_ids length. Included "
"when `return_length` is `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:99
msgid ""
"**overflowing_tokens** (list[int] or list[list[int]], optional): List of "
"overflowing tokens. Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens` is True."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:102
msgid ""
"**num_truncated_tokens** (int or list[int], optional): The number of "
"overflowing tokens. Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens` is True."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:105
msgid ""
"**special_tokens_mask** (list[int] or list[list[int]], optional): List of"
" integers valued 0 or 1, with 0 specifying special added tokens and 1 "
"specifying sequence tokens. Included when `return_special_tokens_mask` is"
" `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:108
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:102
msgid ""
"**offset_mapping** (list[int], optional): list of pair preserving the "
"index of start and end char in original input for each token. For a "
"sqecial token, the index pair is `(0, 0)`. Included when "
"`return_overflowing_tokens` is True or `stride` > 0."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__:112
msgid ""
"**overflow_to_sample** (int or list[int], optional): Index of example "
"from which this feature is generated. Included when `stride` works."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.__call__
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add
#: paddlenlp.transformers.tokenizer_utils.convert_to_unicode
msgid "返回类型"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens:1
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens_extended:1
msgid ""
"All the special tokens ('<unk>', '<cls>'...) corresponding to special "
"token arguments in `__init__` (arguments end with '_end')."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_ids
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens_extended
msgid "type"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_ids:3
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens:4
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_tokens_extended:4
msgid "list"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.all_special_ids:1
msgid "All the token ids corresponding to all the special tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string:1
msgid ""
"Converts a sequence of tokens (list of string) to a single string by "
"using ``' '.join(tokens)`` ."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string:4
msgid "A sequence of tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.convert_tokens_to_string:7
msgid "Converted string."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:1
msgid ""
"Creates an instance of `PretrainedTokenizer`. Related resources are "
"loaded by specifying name of a built-in pretrained model, or a community-"
"contributed pretrained model, or a local file directory path."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:5
msgid ""
"Name of pretrained model or dir path to load from. The string can be:  - "
"Name of built-in pretrained model - Name of a community-contributed "
"pretrained model. - Local directory path which contains tokenizer related"
" resources   and tokenizer config file (\"tokenizer_config.json\")."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:5
msgid "Name of pretrained model or dir path to load from. The string can be:"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:8
msgid "Name of built-in pretrained model"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:9
msgid "Name of a community-contributed pretrained model."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:10
msgid ""
"Local directory path which contains tokenizer related resources and "
"tokenizer config file (\"tokenizer_config.json\")."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:13
msgid ""
"position arguments for model `__init__`. If provided, use these as "
"position argument values for tokenizer initialization."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:16
msgid ""
"keyword arguments for model `__init__`. If provided, use these to update "
"pre-defined keyword argument values for tokenizer initialization."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:21
msgid "An instance of `PretrainedTokenizer`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.from_pretrained:25
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_pretrained:14
msgid "示例"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_pretrained:1
msgid ""
"Save tokenizer configuration and related resources to files under "
"`save_directory`. The tokenizer configuration would be saved into "
"`tokenizer_config_file` indicating file (thus `tokenizer_config.json`), "
"and resources would be saved into `resource_files_names` indicating files"
" by using `self.save_resources(save_directory)`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_pretrained:7
msgid ""
"The `save_directory` can be used in `from_pretrained` as argument value "
"of `pretrained_model_name_or_path` to re-load the tokenizer."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_pretrained:10
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_resources:4
msgid "Directory to save files into."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_resources:1
msgid ""
"Save tokenizer related resources to `resource_files_names` indicating "
"files under `save_directory` by copying directly. Override it if "
"necessary."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:1
msgid ""
"Instantiate an instance of `Vocab` from a file reserving all tokens by "
"using `Vocab.from_dict`. The file contains a token per line, and the line"
" number would be the index of corresponding token."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:5
msgid "path of file to construct vocabulary."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:7
msgid ""
"special token for unknown token. If no need, it also could be `None`. "
"Defaults to `None`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:10
msgid ""
"special token for padding token. If no need, it also could be `None`. "
"Defaults to `None`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:13
msgid ""
"special token for bos token. If no need, it also could be `None`. "
"Defaults to `None`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:16
msgid ""
"special token for eos token. If no need, it also could be `None`. "
"Defaults to `None`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:19
msgid "keyword arguments for `Vocab.from_dict`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.load_vocabulary:22
msgid "An instance of `Vocab`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_vocabulary:1
msgid ""
"Save all tokens to a vocabulary file. The file contains a token per line,"
" and the line number would be the index of corresponding token."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_vocabulary:4
msgid "File path to be saved to."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.save_vocabulary:6
msgid "The `Vocab` or `dict` instance to be saved."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:1
msgid "Truncates a sequence pair in place to the maximum length."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:3
msgid ""
"list of tokenized input ids. Can be obtained from a string by chaining "
"the `tokenize` and `convert_tokens_to_ids` methods."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:5
msgid ""
"Optional second list of input ids. Can be obtained from a string by "
"chaining the `tokenize` and `convert_tokens_to_ids` methods."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:7
msgid "number of tokens to remove using the truncation strategy"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:9
msgid ""
"string selected in the following options: - 'longest_first' (default) "
"Iteratively reduce the inputs sequence until the input is under "
"max_seq_len     starting from the longest one at each token (when there "
"is a pair of input sequences).     Overflowing tokens only contains "
"overflow from the first sequence. - 'only_first': Only truncate the first"
" sequence. raise an error if the first sequence is shorter or equal to "
"than num_tokens_to_remove. - 'only_second': Only truncate the second "
"sequence - 'do_not_truncate': Does not truncate (raise an error if the "
"input sequence is longer than max_seq_len)"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:9
msgid ""
"string selected in the following options: - 'longest_first' (default) "
"Iteratively reduce the inputs sequence until the input is under "
"max_seq_len"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:11
msgid ""
"starting from the longest one at each token (when there is a pair of "
"input sequences). Overflowing tokens only contains overflow from the "
"first sequence."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:13
msgid ""
"'only_first': Only truncate the first sequence. raise an error if the "
"first sequence is shorter or equal to than num_tokens_to_remove."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:14
msgid "'only_second': Only truncate the second sequence"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:15
msgid ""
"'do_not_truncate': Does not truncate (raise an error if the input "
"sequence is longer than max_seq_len)"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.truncate_sequences:16
msgid ""
"If set to a number along with max_seq_len, the overflowing tokens "
"returned will contain some tokens from the main sequence returned. The "
"value of this argument defines the number of additional tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens:1
msgid ""
"Build model inputs from a sequence or a pair of sequence for sequence "
"classification tasks by concatenating and adding special tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens:4
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens:3
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:3
msgid ""
"Should be overridden in a subclass if the model has a special way of "
"building those."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens:6
msgid "List of IDs to which the special tokens will be added."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens:8
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:10
msgid "Optional second list of IDs for sequence pairs."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_inputs_with_special_tokens:11
msgid "List of input_id with the appropriate special tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens:1
msgid ""
"Build offset map from a pair of offset map by concatenating and adding "
"offsets of special tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens:5
msgid "List of char offsets to which the special tokens will be added."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens:7
msgid "Optional second list of char offsets for offset mapping pairs."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.build_offset_mapping_with_special_tokens:10
msgid "List of char offsets with the appropriate offsets of special tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:1
msgid ""
"Retrieves sequence ids from a token list that has no special tokens "
"added. This method is called when adding special tokens using the "
"tokenizer ``encode`` methods."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:4
msgid "List of ids of the first sequence."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:6
msgid "List of ids of the second sequence."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:8
msgid ""
"Whether or not the token list is already formatted with special tokens "
"for the model. Defaults to None."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:12
msgid ""
"The list of integers in the range [0, 1]:     1 for a special token, 0 "
"for a sequence token."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:14
msgid "The list of integers in the range [0, 1]:"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_special_tokens_mask:15
msgid "1 for a special token, 0 for a sequence token."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:1
msgid ""
"Create a mask from the two sequences passed to be used in a sequence-pair"
" classification task."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:6
msgid ""
"If `token_ids_1` is `None`, this method only returns the first portion of"
" the mask (0s)."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:8
msgid "List of IDs."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.create_token_type_ids_from_sequences:13
msgid "List of token_type_id according to the given sequence(s)."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add:1
msgid ""
"Returns the number of added tokens when encoding a sequence with special "
"tokens."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add:3
msgid ""
"Whether the number of added tokens should be computed in the case of a "
"sequence pair or a single sequence. Defaults to `False`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.num_special_tokens_to_add:7
msgid "Number of special tokens added to sequences."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:1
msgid ""
"Performs tokenization and uses the tokenized tokens to prepare model "
"inputs. It supports sequence or sequence pair as input, and batch input "
"is not allowed."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:5
msgid ""
"The sequence to be processed. One sequence is a string, a list of "
"strings, or a list of integers depending on whether it has been "
"pretokenized and converted to ids."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:51
msgid ""
"Whether to include the list of pair preserving the index of start and end"
" char in original input for each token in the returned dictionary. "
"Defaults to `False`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:59
msgid ""
"The dict has the following optional items:  - **input_ids** (list[int]): "
"List of token ids to be fed to a model. - **position_ids** (list[int], "
"optional): List of token position ids to be   fed to a model. Included "
"when `return_position_ids` is `True` - **token_type_ids** (list[int], "
"optional): List of token type ids to be   fed to a model. Included when "
"`return_token_type_ids` is `True`. - **attention_mask** (list[int], "
"optional): List of integers valued 0 or 1,   where 0 specifies paddings "
"and should not be attended to by the   model. Included when "
"`return_attention_mask` is `True`. - **seq_len** (int, optional): The "
"input_ids length. Included when `return_length`   is `True`. - "
"**overflowing_tokens** (list[int], optional): List of overflowing tokens."
"   Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens`   is True. - **num_truncated_tokens** (int, "
"optional): The number of overflowing tokens.   Included when if "
"`max_seq_len` is specified and `return_overflowing_tokens`   is True. - "
"**special_tokens_mask** (list[int], optional): List of integers valued 0 "
"or 1,   with 0 specifying special added tokens and 1 specifying sequence "
"tokens.   Included when `return_special_tokens_mask` is `True`. - "
"**offset_mapping** (list[int], optional): list of pair preserving the   "
"index of start and end char in original input for each token.   For a "
"sqecial token, the index pair is `(0, 0)`. Included when   "
"`return_overflowing_tokens` is True."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:83
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:61
msgid "**input_ids** (list[int]): List of token ids to be fed to a model."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:84
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:62
msgid ""
"**position_ids** (list[int], optional): List of token position ids to be "
"fed to a model. Included when `return_position_ids` is `True`"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:86
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:64
msgid ""
"**token_type_ids** (list[int], optional): List of token type ids to be "
"fed to a model. Included when `return_token_type_ids` is `True`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:88
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:66
msgid ""
"**attention_mask** (list[int], optional): List of integers valued 0 or 1,"
" where 0 specifies paddings and should not be attended to by the model. "
"Included when `return_attention_mask` is `True`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:91
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:69
msgid ""
"**seq_len** (int, optional): The input_ids length. Included when "
"`return_length` is `True`."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:93
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:71
msgid ""
"**overflowing_tokens** (list[int], optional): List of overflowing tokens."
" Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens` is True."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:96
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:74
msgid ""
"**num_truncated_tokens** (int, optional): The number of overflowing "
"tokens. Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens` is True."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:99
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:77
msgid ""
"**special_tokens_mask** (list[int], optional): List of integers valued 0 "
"or 1, with 0 specifying special added tokens and 1 specifying sequence "
"tokens. Included when `return_special_tokens_mask` is `True`."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.encode:80
msgid ""
"**offset_mapping** (list[int], optional): list of pair preserving the "
"index of start and end char in original input for each token. For a "
"sqecial token, the index pair is `(0, 0)`. Included when "
"`return_overflowing_tokens` is True."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:1
msgid ""
"Performs tokenization and uses the tokenized tokens to prepare model "
"inputs. It supports batch inputs of sequence or sequence pair."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:4
msgid ""
"The element of list can be sequence or sequence pair, and the sequence is"
" a string or a list of strings depending on whether it has been "
"pretokenized. If each sequence is provided as a list of strings "
"(pretokenized), you must set `is_split_into_words` as `True` to "
"disambiguate with a sequence pair."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:81
msgid ""
"The dict has the following optional items:  - **input_ids** (list[int]): "
"List of token ids to be fed to a model. - **position_ids** (list[int], "
"optional): List of token position ids to be   fed to a model. Included "
"when `return_position_ids` is `True` - **token_type_ids** (list[int], "
"optional): List of token type ids to be   fed to a model. Included when "
"`return_token_type_ids` is `True`. - **attention_mask** (list[int], "
"optional): List of integers valued 0 or 1,   where 0 specifies paddings "
"and should not be attended to by the   model. Included when "
"`return_attention_mask` is `True`. - **seq_len** (int, optional): The "
"input_ids length. Included when `return_length`   is `True`. - "
"**overflowing_tokens** (list[int], optional): List of overflowing tokens."
"   Included when if `max_seq_len` is specified and "
"`return_overflowing_tokens`   is True. - **num_truncated_tokens** (int, "
"optional): The number of overflowing tokens.   Included when if "
"`max_seq_len` is specified and `return_overflowing_tokens`   is True. - "
"**special_tokens_mask** (list[int], optional): List of integers valued 0 "
"or 1,   with 0 specifying special added tokens and 1 specifying sequence "
"tokens.   Included when `return_special_tokens_mask` is `True`. - "
"**offset_mapping** (list[int], optional): list of pair preserving the   "
"index of start and end char in original input for each token.   For a "
"sqecial token, the index pair is `(0, 0)`. Included when   "
"`return_overflowing_tokens` is True or `stride` > 0. - "
"**overflow_to_sample** (int, optional): Index of example from which this"
"   feature is generated. Included when `stride` works."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.batch_encode:106
msgid ""
"**overflow_to_sample** (int, optional): Index of example from which this "
"feature is generated. Included when `stride` works."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping:1
msgid ""
"Returns the map of tokens and the start and end index of their start and "
"end character. Modified from "
"https://github.com/bojone/bert4keras/blob/master/bert4keras/tokenizers.py#L372"
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping:4
msgid "Input text."
msgstr ""

#: of
#: paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer.get_offset_mapping:7
msgid "The offset map of input text."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:1
msgid "基类：:class:`paddlenlp.transformers.tokenizer_utils.PretrainedTokenizer`"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:1
msgid ""
"The base class for all bpe tokenizers. It mainly provides common tokenize"
" methods for bpe type tokenizer."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:4
msgid "file path of the vocabulary."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:6
msgid "file path of the id to vocab."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:8
msgid "file path of word merge text."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:10
msgid "The special token for unknown words. Defaults to \"[UNK]\"."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:13
msgid "The special token for separator token. Defaults to \"[SEP]\"."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:16
msgid "The special token for padding. Defaults to \"[PAD]\"."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:19
msgid "The special token for cls. Defaults to \"[CLS]\"."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.BPETokenizer:22
msgid "The special token for mask. Defaults to \"[MASK]\"."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.tokenize_chinese_chars:1
msgid "Adds whitespace around any CJK character."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.is_chinese_char:1
msgid "Checks whether CP is the codepoint of a CJK character."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.normalize_chars:1
msgid ""
"Normalize the text for multiligual and chinese models. Unicode range: "
"https://www.ling.upenn.edu/courses/Spring_2003/ling538/UnicodeRanges.html"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.tokenize_special_chars:1
msgid "Adds whitespace around any special character."
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.convert_to_unicode:1
msgid ""
"Converts `text` to Unicode (if it's not already), assuming utf-8 input. "
":param text: Text to be converted to unicode. :type text: str|bytes"
msgstr ""

#: of paddlenlp.transformers.tokenizer_utils.convert_to_unicode:5
msgid "converted text."
msgstr ""

#~ msgid ""
#~ "The dict has the following optional "
#~ "items:  - **input_ids** (list[int]): List "
#~ "of token ids to be fed to a"
#~ " model. - **position_ids** (list[int], "
#~ "optional): List of token position ids"
#~ " to be   fed to a model. "
#~ "Included when `return_position_ids` is `True`"
#~ " - **token_type_ids** (list[int], optional): "
#~ "List of token type ids to be   "
#~ "fed to a model. Included when "
#~ "`return_token_type_ids` is `True`. - "
#~ "**attention_mask** (list[int], optional): List "
#~ "of integers valued 0 or 1,   where"
#~ " 0 specifies paddings and should not"
#~ " be attended to by the   model. "
#~ "Included when `return_attention_mask` is "
#~ "`True`. - **seq_len** (int, optional): "
#~ "The input_ids length. Included when "
#~ "`return_length`   is `True`. - "
#~ "**overflowing_tokens** (list[int], optional): List"
#~ " of overflowing tokens.   Included when "
#~ "if `max_seq_len` is specified and "
#~ "`return_overflowing_tokens`   is True. - "
#~ "**num_truncated_tokens** (int, optional): The "
#~ "number of overflowing tokens.   Included "
#~ "when if `max_seq_len` is specified and"
#~ " `return_overflowing_tokens`   is True. - "
#~ "**special_tokens_mask** (list[int], optional): List"
#~ " of integers valued 0 or 1,   "
#~ "with 0 specifying special added tokens"
#~ " and 1 specifying sequence tokens.   "
#~ "Included when `return_special_tokens_mask` is "
#~ "`True`. - **offset_mapping** (list[int], "
#~ "optional): list of pair preserving the"
#~ "   index of start and end char "
#~ "in original input for each token.   "
#~ "For a special token, the index "
#~ "pair is `(0, 0)`. Included when   "
#~ "`stride` works. - **overflow_to_sample** (int,"
#~ " optional): Index of example from "
#~ "which this   feature is generated. "
#~ "Included when `stride` works."
#~ msgstr ""

#~ msgid ""
#~ "**offset_mapping** (list[int], optional): list "
#~ "of pair preserving the index of "
#~ "start and end char in original "
#~ "input for each token. For a "
#~ "special token, the index pair is "
#~ "`(0, 0)`. Included when `stride` works."
#~ msgstr ""

#~ msgid ""
#~ "The dict has the following optional "
#~ "items:  - **input_ids** (list[int]): List "
#~ "of token ids to be fed to a"
#~ " model. - **position_ids** (list[int], "
#~ "optional): List of token position ids"
#~ " to be   fed to a model. "
#~ "Included when `return_position_ids` is `True`"
#~ " - **token_type_ids** (list[int], optional): "
#~ "List of token type ids to be   "
#~ "fed to a model. Included when "
#~ "`return_token_type_ids` is `True`. - "
#~ "**attention_mask** (list[int], optional): List "
#~ "of integers valued 0 or 1,   where"
#~ " 0 specifies paddings and should not"
#~ " be attended to by the   model. "
#~ "Included when `return_attention_mask` is "
#~ "`True`. - **seq_len** (int, optional): "
#~ "The input_ids length. Included when "
#~ "`return_length`   is `True`. - "
#~ "**overflowing_tokens** (list[int], optional): List"
#~ " of overflowing tokens.   Included when "
#~ "if `max_seq_len` is specified and "
#~ "`return_overflowing_tokens`   is True. - "
#~ "**num_truncated_tokens** (int, optional): The "
#~ "number of overflowing tokens.   Included "
#~ "when if `max_seq_len` is specified and"
#~ " `return_overflowing_tokens`   is True. - "
#~ "**special_tokens_mask** (list[int], optional): List"
#~ " of integers valued 0 or 1,   "
#~ "with 0 specifying special added tokens"
#~ " and 1 specifying sequence tokens.   "
#~ "Included when `return_special_tokens_mask` is "
#~ "`True`."
#~ msgstr ""

#~ msgid ""
#~ "The dict has the following optional "
#~ "items:  - **input_ids** (list[int]): List "
#~ "of token ids to be fed to a"
#~ " model. - **position_ids** (list[int], "
#~ "optional): List of token position ids"
#~ " to be   fed to a model. "
#~ "Included when `return_position_ids` is `True`"
#~ " - **token_type_ids** (list[int], optional): "
#~ "List of token type ids to be   "
#~ "fed to a model. Included when "
#~ "`return_token_type_ids` is `True`. - "
#~ "**attention_mask** (list[int], optional): List "
#~ "of integers valued 0 or 1,   where"
#~ " 0 specifies paddings and should not"
#~ " be attended to by the   model. "
#~ "Included when `return_attention_mask` is "
#~ "`True`. - **seq_len** (int, optional): "
#~ "The input_ids length. Included when "
#~ "`return_length`   is `True`. - "
#~ "**overflowing_tokens** (list[int], optional): List"
#~ " of overflowing tokens.   Included when "
#~ "if `max_seq_len` is specified and "
#~ "`return_overflowing_tokens`   is True. - "
#~ "**num_truncated_tokens** (int, optional): The "
#~ "number of overflowing tokens.   Included "
#~ "when if `max_seq_len` is specified and"
#~ " `return_overflowing_tokens`   is True. - "
#~ "**special_tokens_mask** (list[int], optional): List"
#~ " of integers valued 0 or 1,   "
#~ "with 0 specifying special added tokens"
#~ " and 1 specifying sequence tokens.   "
#~ "Included when `return_special_tokens_mask` is "
#~ "`True`. - **offset_mapping** (list[int], "
#~ "optional): list of pair preserving the"
#~ "   index of start and end char "
#~ "in original input for each token.   "
#~ "For a sqecial token, the index "
#~ "pair is `(0, 0)`. Included when   "
#~ "`stride` works. - **overflow_to_sample** (int,"
#~ " optional): Index of example from "
#~ "which this   feature is generated. "
#~ "Included when `stride` works."
#~ msgstr ""

#~ msgid ""
#~ "**offset_mapping** (list[int], optional): list "
#~ "of pair preserving the index of "
#~ "start and end char in original "
#~ "input for each token. For a "
#~ "sqecial token, the index pair is "
#~ "`(0, 0)`. Included when `stride` works."
#~ msgstr ""

